Tensorliststack

将多个张量(Tensor)堆叠成一个更大的张量。此算子可以处理不同数据类型的张量,将它们按顺序拼接成一个连续的内存块。

\[\text{output\_data} = [\text{tensor}_1, \text{tensor}_2, \ldots, \text{tensor}_n]\]

其中每个张量的数据类型和元素数量可以不同。

输入:
  • tensor_num - 张量数量,tensor_num > 0

  • tensor_element_nums - 每个张量的元素数量(int* 类型)

  • tensor_data_type - 每个张量元素的数据类型,以字节数表示

  • tensor_data - 每个张量数据的起始地址(void** 类型)

  • output_data - 输出结果的数组起始位置(void* 类型)

  • unknown_type_offset - 未知类型数据在输出结果中的偏移量

  • core_mask - 核掩码(int),仅共享存储版本需要

输出:
  • output_data - 堆叠后的张量数据,按输入顺序连续存储

支持平台:

FT78NE MT7004

备注

  • 该算子不区分具体的数据类型,数据类型信息通过tensor_data_type参数传递

  • 当tensor_data_type[i]为0(kTypeUnknown)时,算子会将输出内存清零

  • 当tensor_data_type[i]不为0时,算子会按字节复制数据

  • 调用前需要确保output_data指向的内存空间足够大以容纳所有张量数据

  • TensorList中不同的Tensor数据类型可能不同,类型信息已经在算子中包含

共享存储版本:

void fp_tensorlist_stack_s(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset, int core_mask);
void i16_tensorlist_stack_s(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset, int core_mask);
void c64_tensorlist_stack_s(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset, int core_mask);
void hp_tensorlist_stack_s(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset, int core_mask);
void i32_tensorlist_stack_s(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset, int core_mask);

C调用示例(共享存储版本):

 1//FT78NE示例
 2#include <stdio.h>
 3#include <tensorliststack.h>
 4#include <string.h>
 5
 6int main(int argc, char* argv[]) {
 7    void* output_data = (void*)0x10010000;
 8    void* check_data = (void*)0x10020000;
 9    int tensor_num = 4; // 测试一个包含向量部分和尾部的数据长度
10
11    int tensor_element_nums[4] = {4096, 4096, 4096, 4096};
12    int tensor_data_type[4] = {4, 2, 4, 0};//4种数据类型
13
14    void* tensor_data[4] = {(void *)0x10030000, (void *)0x10040000, (void *)0x10050000, (void *)0x10060000};
15
16    srand(seed++);
17    // 初始化测试数据,包含各种情况
18    int i, j;
19
20    //tensor 1 int32
21    for(i = 0; i < tensor_element_nums[0]; i ++) {
22        ((int *)tensor_data[0])[i] = rand()%100;
23    }
24
25    //tensor 2 int16
26    for(i = 0; i < tensor_element_nums[1]; i ++) {
27        ((int16_t *)tensor_data[1])[i] = rand()%100;
28    }
29
30    //tensor 3 fp32
31    for (i = 0; i < tensor_element_nums[2]; i ++) {
32        ((float *)tensor_data[2])[i] = (float)rand()/RAND_MAX;
33    }
34
35    //tensor 4 fp16
36    for(i = 0; i < tensor_element_nums[3]; i ++) {
37        //类型为kTypeUnknown,不需要初始化
38    }
39    int core_mask = 0x0f;
40    fp_tensorlist_stack_s(tensor_num, tensor_element_nums, tensor_data_type, tensor_data, output_data, unknown_type_offset, core_mask);
41
42    return 0;
43}

私有存储版本:

void fp_tensorlist_stack_p(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset);
void i16_tensorlist_stack_p(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset);
void c64_tensorlist_stack_p(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset);
void hp_tensorlist_stack_p(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset);
void i32_tensorlist_stack_p(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset);

C调用示例(私有存储版本):

 1//FT78NE示例
 2#include <stdio.h>
 3#include <tensorliststack.h>
 4#include <string.h>
 5
 6int main(int argc, char* argv[]) {
 7    void* output_data = (void*)0x10010000;
 8    void* check_data = (void*)0x10020000;
 9    int tensor_num = 4; // 测试一个包含向量部分和尾部的数据长度
10
11    int tensor_element_nums[4] = {4096, 4096, 4096, 4096};
12    int tensor_data_type[4] = {4, 2, 4, 0};//4种数据类型
13
14    void* tensor_data[4] = {(void *)0x10030000, (void *)0x10040000, (void *)0x10050000, (void *)0x10060000};
15
16    srand(seed++);
17    // 初始化测试数据,包含各种情况
18    int i, j;
19
20    //tensor 1 int32
21    for(i = 0; i < tensor_element_nums[0]; i ++) {
22        ((int *)tensor_data[0])[i] = rand()%100;
23    }
24
25    //tensor 2 int16
26    for(i = 0; i < tensor_element_nums[1]; i ++) {
27        ((int16_t *)tensor_data[1])[i] = rand()%100;
28    }
29
30    //tensor 3 fp32
31    for (i = 0; i < tensor_element_nums[2]; i ++) {
32        ((float *)tensor_data[2])[i] = (float)rand()/RAND_MAX;
33    }
34
35    //tensor 4 fp16
36    for(i = 0; i < tensor_element_nums[3]; i ++) {
37        //类型为kTypeUnknown,不需要初始化
38    }
39
40    fp_tensorlist_stack_p(tensor_num, tensor_element_nums, tensor_data_type, tensor_data, output_data, unknown_type_offset);
41
42    return 0;
43}